﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Text;

using Ivony.Fluent;
using Ivony.Html;
using Ivony.Html.Parser;

using ihongma.Common;
namespace ihongma.Job
{
     public class JobCollect
    {
         ihongma.BLL.article add = new ihongma.BLL.article();
         JumonyParser parser = new JumonyParser();
         public  void cnblogs(int count)
         {
             for (int i = 0; i <= count; i++)
             {
                 url(i);
             }
         }
         public void url(int j)
         {
             //StringBuilder sb = new StringBuilder();
             string showhtml = HttpRequestHelper.GetPageSourceByUrl("http://www.cnblogs.com/lhb25/default.html?page=" + j.ToString(), "utf-8");
             var dataDocument = parser.Parse(showhtml);
             var items = dataDocument.Find(".postTitle a");
             var ci = dataDocument.Find(".c_b_p_desc");
             var href = items.ToArray();
             var info = ci.ToArray();
             for (int i = 0; i < href.Count(); i++)
             {
                 
                 //href.Count()
                 // sb.Append(href[i].Attribute("href").Value()+"<br/>");
                 //sb.Append(info[i].InnerText().Replace("摘要:","").Replace("阅读全文",""));

                 pageinfo(href[i].Attribute("href").Value(), info[i].InnerText().Replace("摘要:", "").Replace("阅读全文", ""), href[i].InnerText());
             }
             //foreach (var i in items)
             //{

             //    sb.Append(i.Attribute("href").Value()+"<br/>");
             //    sb.Append(i.InnerText());
             //}
             // return sb.ToString();

         }
         public void pageinfo(string usrl, string temp,string title)
         {
             if (!title.Contains("Web 前端开发精华文章推荐"))
             {
                 if (!add.ExistsTitle(title))
                 {
                     //StringBuilder sb = new StringBuilder();
                     string showhtml = HttpRequestHelper.GetPageSourceByUrl(usrl, "utf-8");
                     var dataDocument = parser.Parse(showhtml);
                     var items = dataDocument.Find("#cnblogs_post_body");

                     items.Find("ul").Remove();
                     items.Find("p").Reverse().Take(3).Remove();
                     var img = "";
                     try
                     {
                         img = items.Find("img").ElementAt(0).Attribute("src").Value();
                     }
                     catch (Exception)
                     {

                         img = "/upload/null.jpg";
                     }

                     ihongma.Model.article ar = new ihongma.Model.article()
                     {
                         add_time = DateTime.Parse(dataDocument.Find("#post-date").Single().InnerText()),
                         author = "colt",
                         category_id = 350,
                         channel_id = 1,
                         zhaiyao = temp,
                         click = 100,
                         content = items.Single().ToString().Replace("您可能感兴趣的相关文章", "").Replace("您可能还喜欢", ""),
                         //dataDocument.Find("#artical_real p").ElementAt(0).InnerHtml() + "<p>" + temp + "</p>",

                         title = dataDocument.Find(".postTitle2").Single().InnerText(),
                         digg_act = 0,
                         digg_good = 0,
                         form = "鸿马",
                         img_url = img,
                         is_hot = 0,
                         is_lock = 0,
                         is_msg = 0,
                         is_red = 0,
                         is_slide = 0,
                         is_top = 0,
                         seo_description = temp,
                         seo_keywords = dataDocument.Find(".postTitle2").Single().InnerText(),
                         seo_title = dataDocument.Find(".postTitle2").Single().InnerText(),
                         sort_id = 99,
                         link_url = ""

                     };
                     add.Add(ar);

                 }
             }
         }
    }
}
